Load libraries and data

library(Seurat)
library(scDblFinder)
library(knitr)
library(ggbeeswarm)
library(ggh4x)
library(grDevices)
library(gridExtra)
library(tidyverse)

set.seed(4)

# Top-level folder where script outputs should be stored
script_output_dir <- file.path(here::here(), "output")

day3_singlets <- readRDS(file = file.path(script_output_dir, "processed_data/2_dm_singlets_d3.rds"))
day15_singlets <- readRDS(file = file.path(script_output_dir, "processed_data/2_dm_singlets_d15.rds"))

DefaultAssay(day3_singlets) <- "RNA"
DefaultAssay(day15_singlets) <- "RNA"

d3_qc_counts <- readRDS(file = file.path(script_output_dir, "processed_data/2_d3_qc_df.rds"))
d15_qc_counts <- readRDS(file = file.path(script_output_dir, "processed_data/2_d15_qc_df.rds"))

Visualize feature counts, umi counts, percent mitochondria

day3_singlets[["percent.mt"]] <- PercentageFeatureSet(day3_singlets, pattern = "^MT-")
day15_singlets[["percent.mt"]] <- PercentageFeatureSet(day15_singlets, pattern = "^MT-")

Day 3

VlnPlot(day3_singlets, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Day 15

VlnPlot(day15_singlets, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

Filter

Starting number of cells

Day 3

table(day3_singlets$PTID)
## 
##        1        5        7        8        9       10       11       12 
##      762     1250     1257     1612     1423     1404        5     1461 
##       13       16  Doublet Negative 
##     1634     2245        0        0
length(day3_singlets$orig.ident)
## [1] 13053

Day 15

table(day15_singlets$PTID)
## 
##        1        5        7        8        9       10       11       12 
##     1191        0       16        9     1286      300     1442        0 
##       13       16  Doublet Negative 
##      730      475        0        0
length(day15_singlets$orig.ident)
## [1] 5449

Our cutoffs:

  • feature counts > 200
  • mitochondrial counts < 10%

Note that for Jim Kublin’s talk in the spring we used < 5% mitochondrial counts

# Day 3
day3 <- subset(day3_singlets, subset = nFeature_RNA > 200)
d3_200_gene_count <- length(day3$orig.ident)  # Save QC counts
day3 <- subset(day3, subset = percent.mt < 10)
d3_10_mt_count <- length(day3$orig.ident)  # Save QC counts

# Day 15
day15 <- subset(day15_singlets, subset = nFeature_RNA > 200)
d15_200_gene_count <- length(day15$orig.ident)  # Save QC counts
day15 <- subset(day15, subset = percent.mt < 10)
d15_10_mt_count <- length(day15$orig.ident)  # Save QC counts

rm(day3_singlets)
rm(day15_singlets)

Current number of cells

Day 3

table(day3$PTID)
## 
##        1        5        7        8        9       10       11       12 
##      754     1245     1212     1606     1399     1332        5     1446 
##       13       16  Doublet Negative 
##     1622     2231        0        0
length(day3$orig.ident)
## [1] 12852

Day 15

table(day15$PTID)
## 
##        1        5        7        8        9       10       11       12 
##     1093        0       10        5     1188      280     1352        0 
##       13       16  Doublet Negative 
##      682      431        0        0
length(day15$orig.ident)
## [1] 5041

Identify within-sample doublets

Remove cells from hashtags that didn’t work (<= 10 cells). Low cell numbers cause scDblFinder errors.

day3 <- subset(day3, subset = PTID %in% c("11"), invert = TRUE)
day15 <- subset(day15, subset = PTID %in% c("5", "7", "8", "12"), invert = TRUE)
doublet_finder <- function(in_obj) {
  sce <- as.SingleCellExperiment(in_obj)
  sce <- scDblFinder(sce, samples = "PTID")
  in_obj$scDblFinder.class <- sce@colData$scDblFinder.class
  in_obj$scDblFinder.score <- sce@colData$scDblFinder.score
  in_obj$scDblFinder.weighted <- sce@colData$scDblFinder.weighted
  in_obj$scDblFinder.class <- factor(in_obj$scDblFinder.class, levels=c("singlet","doublet"))

  return(in_obj)
}

day3 <- doublet_finder(day3)
## Warning: Layer 'data' is empty
## Warning: Layer 'scale.data' is empty
day15 <- doublet_finder(day15)
## Warning: Layer 'data' is empty
## Layer 'scale.data' is empty

Scatter plots

f1 <- FeatureScatter(day3,
                     feature1 = "nCount_RNA",
                     feature2 = "nFeature_RNA",
                     group.by = "scDblFinder.class",
                     cols = c(alpha("blue", 0.01), alpha("red", 0.5))) +
  scale_y_continuous(trans = "log10") +
  scale_x_continuous(trans = "log10") +
  ggtitle("Day 3")
f2 <- FeatureScatter(day15,
                     feature1 = "nCount_RNA",
                     feature2 = "nFeature_RNA",
                     group.by = "scDblFinder.class",
                     cols = c(alpha("blue", 0.01), alpha("red", 0.5))) +
  scale_y_continuous(trans = "log10") +
  scale_x_continuous(trans = "log10") +
  ggtitle("Day 15")
f1 + f2

Doublet count per PTID

Day 3

table(day3$scDblFinder.class, day3$PTID)
##          
##              1    5    7    8    9   10   11   12   13   16 Doublet Negative
##   singlet  716 1188 1163 1508 1331 1263    0 1376 1536 2097       0        0
##   doublet   38   57   49   98   68   69    0   70   86  134       0        0

Day 15

table(day15$scDblFinder.class, day15$PTID)
##          
##              1    5    7    8    9   10   11   12   13   16 Doublet Negative
##   singlet 1045    0    0    0 1130  256 1243    0  642  415       0        0
##   doublet   48    0    0    0   58   24  109    0   40   16       0        0

Keep singlets

day3 <- subset(day3, subset = scDblFinder.class == "singlet")
day15 <- subset(day15, subset = scDblFinder.class == "singlet")

# Save QC counts
d3_gex_singlet_count <- length(day3$orig.ident)
d15_gex_singlet_count <- length(day15$orig.ident)

saveRDS(day3, file = file.path(script_output_dir, "processed_data/3_singlets_d3.rds"))
saveRDS(day15, file = file.path(script_output_dir, "processed_data/3_singlets_d15.rds"))
day3 <- readRDS(file = file.path(script_output_dir, "processed_data/3_singlets_d3.rds"))
day15 <- readRDS(file = file.path(script_output_dir, "processed_data/3_singlets_d15.rds"))

QC plots

Step bar graph

# Prep dataframes
d3qc <- d3_qc_counts %>%
  dplyr::rename("1. Cell Ranger filtered feature output" = "d3_start_count",
                "2. Demultiplex with HTOs" = "d3_hto_singlet_count") %>%
  add_column("3. >200 unique genes/cell" = d3_200_gene_count, 
             "4. <10% MT counts" = d3_10_mt_count, 
             "5. Singlets by GEX" = d3_gex_singlet_count) %>%
  tidyr::pivot_longer(everything(), names_to = "Step", values_to = "Count")

d15qc <- d15_qc_counts %>%
  dplyr::rename("1. Cell Ranger filtered feature output" = "d15_start_count",
                "2. Demultiplex with HTOs" = "d15_hto_singlet_count") %>%
  add_column("3. >200 unique genes/cell" = d15_200_gene_count, 
             "4. <10% MT counts" = d15_10_mt_count, 
             "5. Singlets by GEX" = d15_gex_singlet_count) %>%
  tidyr::pivot_longer(everything(), names_to = "Step", values_to = "Count")

write_csv(d3qc, file = file.path(script_output_dir, "processed_data/d3qc_barplot.csv"))
write_csv(d15qc, file = file.path(script_output_dir, "processed_data/d15qc_barplot.csv"))


# Plot
make_qc_graph <- function(counts, title) {
  qc_plot <- ggplot(counts, aes(x = Step, y = Count)) +
    geom_col() +
    theme_bw() +
    geom_text(aes(label = Count),
              position = position_stack(vjust = 0.5),
              color = "white",
              size = 4) +
    theme(text = element_text(family = "Arial"),
          axis.title.x = element_text(size = 8),
          axis.title.y = element_blank(),
          axis.text.x = element_text(color = "black", size = 8),
          axis.text.y = element_text(color = "black", size = 8),
          plot.title = element_text(size = 8, hjust = 0.5),
          panel.grid = element_blank(),
          panel.border = element_blank(),
          axis.line.y.left = element_line(color = "black"),
          axis.line.x.bottom = element_line(color = "black")) +
    scale_y_continuous(n.breaks = 5) +
    scale_x_discrete(limits = rev) +
    coord_flip() +
    labs(y = "# of cell barcodes remaining after QC",
         title = title)
}

d3qc_plot <- make_qc_graph(d3qc, "Day 3")
d3qc_plot

d15qc_plot <- make_qc_graph(d15qc, "Day 15")
d15qc_plot

# Day 3
ggsave(file.path(script_output_dir, "plots/SuppFig4_qc_barplot_d3.png"), plot = d3qc_plot,
       dpi = 300, width = 4, height = 2, device = "png")

cairo_pdf(file = file.path(script_output_dir, "plots/SuppFig4_qc_barplot_d3.pdf"), 
          width=4, height=2, bg = "transparent", family = "Arial")
print(d3qc_plot)
dev.off()
## png 
##   2
# Day 15
ggsave(file.path(script_output_dir, "plots/SuppFig4_qc_barplot_d15.png"), plot = d15qc_plot,
       dpi = 300, width = 4, height = 2, device = "png")

cairo_pdf(file = file.path(script_output_dir, "plots/SuppFig4_qc_barplot_d15.pdf"), 
          width=4, height=2, bg = "transparent", family = "Arial")
print(d15qc_plot)
dev.off()
## png 
##   2

How many cells do we have to work with now?

Dot plots

d3_count_df <- day3@meta.data %>%
  group_by(ARM, PTID) %>%
  tally()

d15_count_df <- day15@meta.data %>%
  group_by(ARM, PTID) %>%
  tally()

ptid_colors <- c("1" = "salmon", "5" = "orange", "7" = "#7570B3",
                 "8" = "#0CB702", "9" = "#A6761D", "10" = "#00BFC4",
                 "11" = "#C77CFF", "12" = "#FF61CC", "13" = "darkgreen",
                 "16" = "darkblue")


make_dotplot <- function(count_df, day, ptid_colors, no_legend) {
  count_pval <- wilcox.test(n ~ ARM, data = count_df, paired = FALSE)$p.value
  
  count_test_df <- data.frame(p = count_pval) %>%
    mutate(p.text = if_else(p < 0.001, "p<0.001", 
                            paste0("p=", formatC(round(p, 3), format='f', digits=3))))
  
  count_plot <- ggplot(count_df, aes(x = ARM, y = n)) +
    theme_bw() +
    stat_summary(fun = median, geom = "crossbar", width = 0.3) +
    geom_quasirandom(size = 4, shape = 16, width = 0.3, aes(color = PTID)) +
    theme(text = element_text(family = "Arial"),
          axis.title.x = element_blank(),
          axis.title.y = element_text(size=20),
          axis.text.y = element_text(color="black", size=20),
          axis.text.x = element_text(color="black", size=20),
          legend.title = element_text(color="black", size=20),
          legend.text = element_text(color="black", size=20),
          legend.position = "bottom",
          plot.title = element_text(hjust = 0.5, size = 20),
          plot.margin = margin(0.3, 0.2, 0.1, 0.2, "cm"),
          panel.grid = element_blank()) +
    scale_color_manual(values = ptid_colors) +
    labs(title = as.character(day),
         y = "Cell count per sample") +
    force_panelsizes(rows = unit(3.5, "in"),
                     cols = unit(2, "in"))
  
  if(no_legend){
    count_plot <- count_plot +
      theme(legend.title = element_blank(),
            legend.position="none") 
  }
  
  plot_ylims <- ggplot_build(count_plot)$layout$panel_params[[1]]$y.range

  count_plot <- count_plot +
    annotate("text", x = 1.5, y = plot_ylims[2] + 0.01*diff(plot_ylims),
             label = count_test_df$p.text, size = 5) +
    coord_cartesian(ylim = c(plot_ylims[[1]], plot_ylims[[2]] + 0.09*diff(plot_ylims)))
}

# Dot plots
d3dot <- make_dotplot(d3_count_df, "Day 3", ptid_colors, no_legend = TRUE)
d15dot <- make_dotplot(d15_count_df, "Day 15", ptid_colors, no_legend = TRUE)
grid.arrange(d3dot, d15dot, ncol = 2)

# Get a legend with all the ptids
forleg <- bind_rows(d3_count_df, d15_count_df) %>%
  group_by(ARM, PTID) %>%
  tally() %>%
  ggplot(aes(x = ARM, y = n, color = PTID)) +
  geom_jitter() +
  theme_bw() +
  theme(legend.title = element_text(color="black", size=14),
        legend.text = element_text(color="black", size=14)) +
  guides(colour = guide_legend(override.aes = list(size=4))) +
  scale_color_manual(values = ptid_colors, 
                     labels = c("BCG01", "BCG05", "BCG07", "BCG08", "BCG09",
                                 "BCG10", "BCG11", "BCG12", "BCG13", "BCG16"))
forleg

# Dotplots Day 3
ggsave(file.path(script_output_dir, "plots/SuppFig4_d3dot.png"), plot = d3dot,
     dpi = 300, width = 3, height = 4.5, device = "png")

cairo_pdf(file = file.path(script_output_dir, "plots/SuppFig4_d3dot.pdf"), 
          width=3, height=4.5, bg = "transparent", family = "Arial")
print(d3dot)
dev.off()
## png 
##   2
# Dotplots Day 15
ggsave(file.path(script_output_dir, "plots/SuppFig4_d15dot.png"), plot = d15dot,
     dpi = 300, width = 3, height = 4.5, device = "png")

cairo_pdf(file = file.path(script_output_dir, "plots/SuppFig4_d15dot.pdf"), 
          width=3, height=4.5, bg = "transparent", family = "Arial")
print(d15dot)
dev.off()
## png 
##   2
# Legend
ggsave(file.path(script_output_dir, "plots/SuppFig4_legend.png"), plot = forleg,
   dpi = 300, width = 3, height = 4.5, device = "png")

cairo_pdf(file = file.path(script_output_dir, "plots/SuppFig4_legend.pdf"), 
          width=3, height=4.5, bg = "transparent", family = "Arial")
print(forleg)
dev.off()
## png 
##   2

Tables

# Timepoint, Arm, PTID
df_d15 <- day15@meta.data %>%
  group_by(orig.ident, ARM, PTID) %>%
  tally()
day3@meta.data %>%
  group_by(orig.ident, ARM, PTID) %>%
  tally() %>%
  bind_rows(df_d15) %>%
  dplyr::rename("Timepoint" = "orig.ident",
                "# Cells" = "n") %>%
  kable(align = "l", caption = "Singlets per Timepoint, Arm, PTID")
Singlets per Timepoint, Arm, PTID
Timepoint ARM PTID # Cells
Day3 Non-INH 1 716
Day3 Non-INH 5 1188
Day3 Non-INH 7 1163
Day3 Non-INH 8 1508
Day3 Non-INH 12 1376
Day3 INH 9 1331
Day3 INH 10 1263
Day3 INH 13 1536
Day3 INH 16 2097
Day15 Non-INH 1 1045
Day15 INH 9 1130
Day15 INH 10 256
Day15 INH 11 1243
Day15 INH 13 642
Day15 INH 16 415
# Timepoint, Arm
dff_d15 <- day15@meta.data %>%
  group_by(orig.ident, ARM) %>%
  tally()
day3@meta.data %>%
  group_by(orig.ident, ARM) %>%
  tally() %>%
  bind_rows(dff_d15) %>%
  dplyr::rename("Timepoint" = "orig.ident",
                "# Cells" = "n") %>%
  kable(align = "l", caption = "Singlets per Timepoint, Arm")
Singlets per Timepoint, Arm
Timepoint ARM # Cells
Day3 Non-INH 5951
Day3 INH 6227
Day15 Non-INH 1045
Day15 INH 3686

Cluster

Functions

norm_and_reduce <- function(dm_singlets) {
  dm_singlets <- SCTransform(dm_singlets, 
                             vars.to.regress = "percent.mt",
                             verbose = FALSE)
  dm_singlets <- RunPCA(dm_singlets, verbose = FALSE)  
  dm_singlets <- FindNeighbors(dm_singlets, dims = 1:30)
  dm_singlets <- RunUMAP(dm_singlets, dims = 1:30)
  return(dm_singlets)
}

markers_heatmap <- function(obj) {
  topmarkers <- FindAllMarkers(obj, 
                            only.pos = TRUE, 
                            min.pct = 0.25, 
                            logfc.threshold = 0.25) %>%
      group_by(cluster) %>%
      top_n(n = 8, wt = avg_log2FC)
  
  heatmap <- DoHeatmap(obj, features = topmarkers$gene) + 
    NoLegend()
  return(heatmap)
}

Day 3

Normalize, reduce dimensions, cluster. I find that Louvain clusters are more in line with the differences I see with SingleR annotations at Day 15, so using the same method and resolution here.

day3 <- norm_and_reduce(day3)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
day3_clstr <- FindClusters(day3, verbose = FALSE, resolution = 0.5)

UMAP

p1 <- DimPlot(day3_clstr, pt.size = 0.4, label = TRUE,
              label.size = 5) + 
  ggtitle("DAY 3") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")
p1

Violin plots

Key markers:

  • Lymphoid: IL7R
  • Myeloid: LYZ
  • Epithelial: S100A2
  • Connective tissue: DCN
  • Nervous tissue: DCT
  • Endothelial: FLT1
  • Muscle tissue: COL3A1
  • Keratinocyte: DMKN (epidermis-specific secreted protein)
v1 <- VlnPlot(day3_clstr, 
        ncol = 4,
        features = c("IL7R", "LYZ", "S100A2", "DCN", "DCT", "FLT1", "COL3A1", "DMKN"))
v1

Feature and RNA counts, percent mito

v2 <- VlnPlot(day3_clstr, 
        ncol = 1,
        features = c("nFeature_RNA", "nCount_RNA", "percent.mt"))
v2

Heatmap

h1 <- markers_heatmap(day3_clstr) + ggtitle("Day 3: GEX cluster markers")
## Warning in DoHeatmap(obj, features = topmarkers$gene): The following features
## were omitted as they were not found in the scale.data slot for the SCT assay:
## SLC6A14, SLC34A2, SLCO4A1-AS1, PAX3, TRPM1, TIE1
h1 

Additional plots for QC

UMAPs by PTID and ARM

p2 <- DimPlot(day3_clstr, pt.size = 0.4, group.by = "PTID", cols = ptid_colors) + 
  ggtitle("DAY 3") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")

p3 <- DimPlot(day3_clstr, pt.size = 0.4, group.by = "ARM",
              label.size = 5) + 
  ggtitle("DAY 3") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")

p2 + p3

Heatmaps by PTID and ARM

Idents(day3_clstr) <- "PTID"
h2 <- markers_heatmap(day3_clstr) + ggtitle("Day 3: PTID markers")
## Warning in DoHeatmap(obj, features = topmarkers$gene): The following features
## were omitted as they were not found in the scale.data slot for the SCT assay:
## PSME2, TSPO, SNHG16
h2

QC: Markers by ARM

Idents(day3_clstr) <- "ARM"
h3 <- markers_heatmap(day3_clstr) + ggtitle("Day 3: ARM markers")
h3

Conclusions

The clusters make sense, except cluster 13 which is probably dying cells. Remove cluster 13.

d3_filtered <- subset(day3_clstr, subset = seurat_clusters == "13", invert = TRUE)

# Remake UMAPs
Idents(d3_filtered) <- "seurat_clusters"
p1 <- DimPlot(d3_filtered, pt.size = 0.4, label = TRUE,
              label.size = 5) + 
  ggtitle("DAY 3") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")
p1

p2 <- DimPlot(d3_filtered, pt.size = 0.4, group.by = "PTID", cols = ptid_colors) + 
  ggtitle("DAY 3") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")

p3 <- DimPlot(d3_filtered, pt.size = 0.4, group.by = "ARM",
              label.size = 5) + 
  ggtitle("DAY 3") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")

p2 + p3

p3_with_leg <- DimPlot(d3_filtered, pt.size = 0.4, group.by = "ARM",
              label.size = 5) + 
  ggtitle("DAY 3") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "bottom") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")
p3_with_leg

Save

arm_legend <- cowplot::get_legend(p3_with_leg)

# Object
saveRDS(d3_filtered, file = file.path(script_output_dir, "processed_data/4_d3_filtered.rds"))

Day 15

Normalize, reduce dimensions, cluster. I find that Louvain clusters are more in line with the differences I see with SingleR annotations.

day15 <- norm_and_reduce(day15)

day15_clstr <- FindClusters(day15, verbose = FALSE, resolution = 0.5)

UMAP

p11 <- DimPlot(day15_clstr, pt.size = 0.4, label = TRUE,
              label.size = 5) + 
  ggtitle("DAY 15") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")
p11

Violin plots

Key markers:

  • Lymphoid: IL7R
  • Myeloid: LYZ
  • Epithelial: S100A2
  • Connective tissue: DCN
  • Nervous tissue: DCT
  • Endothelial: FLT1
  • Muscle tissue: COL3A1
  • Keratinocyte: DMKN (epidermis-specific secreted protein)
v11 <- VlnPlot(day15_clstr, 
        ncol = 4,
        features = c("IL7R", "LYZ", "S100A2", "DCN", "DCT", "FLT1", "COL3A1", "DMKN"))
v11

Feature and RNA counts, percent mito

v12 <- VlnPlot(day15_clstr, 
        ncol = 1,
        features = c("nFeature_RNA", "nCount_RNA", "percent.mt"))
v12

Heatmap

h11 <- markers_heatmap(day15_clstr) + ggtitle("DAY 15: GEX cluster markers")
## Warning in DoHeatmap(obj, features = topmarkers$gene): The following features
## were omitted as they were not found in the scale.data slot for the SCT assay:
## CLEC12B, BCAN, SLC24A5, AL512308.1, CLDN10-AS1, PLEKHS1, PPARGC1A, CDH6,
## FAM162B, GAMT, GDPD2, LRRC15, BBOX1, KCNK7, NMU
h11 

Additional plots for QC

UMAPs by PTID and ARM

p12 <- DimPlot(day15_clstr, pt.size = 0.4, group.by = "PTID", cols = ptid_colors) + 
  ggtitle("DAY 15") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")

p13 <- DimPlot(day15_clstr, pt.size = 0.4, group.by = "ARM",
              label.size = 5) + 
  ggtitle("DAY 15") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")

p12 + p13

Heatmaps by PTID and ARM

Idents(day15_clstr) <- "PTID"
h12 <- markers_heatmap(day15_clstr) + ggtitle("Day 15: PTID markers")
## Warning in DoHeatmap(obj, features = topmarkers$gene): The following features
## were omitted as they were not found in the scale.data slot for the SCT assay:
## FAM118A, MIR4435-2HG, SMDT1
h12

QC: Markers by ARM

Idents(day15_clstr) <- "ARM"
h13 <- markers_heatmap(day15_clstr) + ggtitle("Day 15: ARM markers")
## Warning in DoHeatmap(obj, features = topmarkers$gene): The following features
## were omitted as they were not found in the scale.data slot for the SCT assay:
## NUTM2A-AS1, SMDT1
h13

Conclusions

The clusters make sense, except cluster 15 which is probably dying cells. Remove cluster 15.

d15_filtered <- subset(day15_clstr, subset = seurat_clusters == "15", invert = TRUE)

# Remake UMAPs
Idents(d15_filtered) <- "seurat_clusters"
p11 <- DimPlot(d15_filtered, pt.size = 0.4, label = TRUE,
              label.size = 5) + 
  ggtitle("DAY 15") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")
p11

p12 <- DimPlot(d15_filtered, pt.size = 0.4, group.by = "PTID", cols = ptid_colors) + 
  ggtitle("DAY 15") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")

p13 <- DimPlot(d15_filtered, pt.size = 0.4, group.by = "ARM",
              label.size = 5) + 
  ggtitle("DAY 15") +
  theme(plot.title = element_text(hjust = 0.5, 
                                  size = 16),
        legend.position = "none") +
  scale_x_discrete("UMAP1") +
  scale_y_discrete("UMAP2")

p12 + p13

Save

# Object
saveRDS(d15_filtered, file = file.path(script_output_dir, "processed_data/4_d15_filtered.rds"))

Session Info

sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/Los_Angeles
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.3             forcats_1.0.0              
##  [3] stringr_1.5.0               dplyr_1.1.3                
##  [5] purrr_1.0.2                 readr_2.1.5                
##  [7] tidyr_1.3.1                 tibble_3.2.1               
##  [9] tidyverse_2.0.0             gridExtra_2.3              
## [11] ggh4x_0.2.6                 ggbeeswarm_0.7.2           
## [13] ggplot2_3.4.4               knitr_1.45                 
## [15] scDblFinder_1.14.0          SingleCellExperiment_1.22.0
## [17] SummarizedExperiment_1.30.2 Biobase_2.60.0             
## [19] GenomicRanges_1.52.1        GenomeInfoDb_1.36.4        
## [21] IRanges_2.34.1              S4Vectors_0.38.2           
## [23] BiocGenerics_0.46.0         MatrixGenerics_1.12.3      
## [25] matrixStats_1.0.0           Seurat_5.0.1               
## [27] SeuratObject_5.0.1          sp_2.1-1                   
## 
## loaded via a namespace (and not attached):
##   [1] spatstat.sparse_3.0-3     bitops_1.0-7             
##   [3] httr_1.4.7                RColorBrewer_1.1-3       
##   [5] tools_4.3.3               sctransform_0.4.1        
##   [7] utf8_1.2.4                R6_2.5.1                 
##   [9] lazyeval_0.2.2            uwot_0.1.16              
##  [11] withr_3.0.0               progressr_0.14.0         
##  [13] cli_3.6.2                 textshaping_0.3.7        
##  [15] spatstat.explore_3.2-5    fastDummies_1.7.3        
##  [17] labeling_0.4.3            sass_0.4.7               
##  [19] spatstat.data_3.0-3       ggridges_0.5.4           
##  [21] pbapply_1.7-2             Rsamtools_2.16.0         
##  [23] systemfonts_1.0.5         scater_1.28.0            
##  [25] parallelly_1.36.0         limma_3.56.2             
##  [27] rstudioapi_0.15.0         generics_0.1.3           
##  [29] BiocIO_1.10.0             ica_1.0-3                
##  [31] spatstat.random_3.2-1     vroom_1.6.4              
##  [33] Matrix_1.6-4              fansi_1.0.6              
##  [35] abind_1.4-5               lifecycle_1.0.4          
##  [37] yaml_2.3.8                edgeR_3.42.4             
##  [39] Rtsne_0.16                glmGamPoi_1.12.2         
##  [41] grid_4.3.3                promises_1.2.1           
##  [43] dqrng_0.3.1               crayon_1.5.2             
##  [45] miniUI_0.1.1.1            lattice_0.22-5           
##  [47] beachmat_2.16.0           cowplot_1.1.1            
##  [49] pillar_1.9.0              metapod_1.8.0            
##  [51] rjson_0.2.21              xgboost_1.7.5.1          
##  [53] future.apply_1.11.0       codetools_0.2-19         
##  [55] leiden_0.4.3              glue_1.7.0               
##  [57] data.table_1.15.0         vctrs_0.6.4              
##  [59] png_0.1-8                 spam_2.10-0              
##  [61] gtable_0.3.4              cachem_1.0.8             
##  [63] xfun_0.40                 S4Arrays_1.2.0           
##  [65] mime_0.12                 survival_3.5-8           
##  [67] statmod_1.5.0             bluster_1.10.0           
##  [69] ellipsis_0.3.2            fitdistrplus_1.1-11      
##  [71] ROCR_1.0-11               nlme_3.1-163             
##  [73] bit64_4.0.5               RcppAnnoy_0.0.21         
##  [75] rprojroot_2.0.3           bslib_0.5.1              
##  [77] irlba_2.3.5.1             vipor_0.4.5              
##  [79] KernSmooth_2.23-22        colorspace_2.1-0         
##  [81] ggrastr_1.0.2             tidyselect_1.2.0         
##  [83] bit_4.0.5                 compiler_4.3.3           
##  [85] BiocNeighbors_1.18.0      DelayedArray_0.26.7      
##  [87] plotly_4.10.3             rtracklayer_1.60.1       
##  [89] scales_1.3.0              lmtest_0.9-40            
##  [91] digest_0.6.33             goftest_1.2-3            
##  [93] spatstat.utils_3.0-4      presto_1.0.0             
##  [95] rmarkdown_2.25            XVector_0.40.0           
##  [97] htmltools_0.5.7           pkgconfig_2.0.3          
##  [99] sparseMatrixStats_1.12.2  highr_0.10               
## [101] fastmap_1.1.1             rlang_1.1.1              
## [103] htmlwidgets_1.6.2         shiny_1.7.5.1            
## [105] DelayedMatrixStats_1.22.6 farver_2.1.1             
## [107] jquerylib_0.1.4           zoo_1.8-12               
## [109] jsonlite_1.8.7            BiocParallel_1.34.2      
## [111] BiocSingular_1.16.0       RCurl_1.98-1.12          
## [113] magrittr_2.0.3            scuttle_1.10.3           
## [115] GenomeInfoDbData_1.2.10   dotCall64_1.1-0          
## [117] patchwork_1.1.3           munsell_0.5.0            
## [119] Rcpp_1.0.11               viridis_0.6.4            
## [121] reticulate_1.34.0         stringi_1.8.3            
## [123] zlibbioc_1.46.0           MASS_7.3-60.0.1          
## [125] plyr_1.8.9                parallel_4.3.3           
## [127] listenv_0.9.0             ggrepel_0.9.4            
## [129] deldir_1.0-9              Biostrings_2.68.1        
## [131] splines_4.3.3             tensor_1.5               
## [133] hms_1.1.3                 locfit_1.5-9.8           
## [135] igraph_1.5.1              spatstat.geom_3.2-7      
## [137] RcppHNSW_0.5.0            reshape2_1.4.4           
## [139] ScaledMatrix_1.8.1        XML_3.99-0.14            
## [141] evaluate_0.22             scran_1.28.2             
## [143] tzdb_0.4.0                httpuv_1.6.12            
## [145] RANN_2.6.1                polyclip_1.10-6          
## [147] future_1.33.0             scattermore_1.2          
## [149] rsvd_1.0.5                xtable_1.8-4             
## [151] restfulr_0.0.15           RSpectra_0.16-1          
## [153] later_1.3.1               viridisLite_0.4.2        
## [155] ragg_1.2.6                beeswarm_0.4.0           
## [157] GenomicAlignments_1.36.0  cluster_2.1.6            
## [159] timechange_0.3.0          globals_0.16.2           
## [161] here_1.0.1